from urllib import request
import re,json,os

def getpage(num):
    '''获取指定页面信息'''
    url = 'https://bj.58.com/chuzu/pn'+str(num)+'/?ClickID=2'
    headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"
    }
    try:
        req = request.Request(url,headers=headers)
        res = request.urlopen(req)

        html = res.read().decode('utf-8')
        html2 = html[25050:45050]

        pat = r'<li[\s\S]*?_src="(.*?)"[\s\S]*?_blank".*?\n([\s\S]*?)</a[\s\S]*?x">(.*?)&nbsp;&nbsp;&nbsp;&nbsp;(.*?)</p[\s\S]*?x">([\d]*?)</b'

        dlist = re.findall(pat,html,re.M)
        x =int(str(num)+'01')
        for item in dlist:
            yield{
                ' | 标题：':item[1].strip(),
                ' | 户型：':item[2].strip()+','+item[3].strip(),
                ' | 价格：':item[4]+'元/月',
                ' | 图片：':item[0],
            }
            #获取图片
            request.urlretrieve('https:'+item[0],'./images/url%s.jpg' % x)
            x+=1
            # print('标题：'+item[1].strip()+' | 户型：'+item[2].strip()+','+item[3].strip()+' | 价格：'+item[4]+'元/月')
        print("共"+str(len(dlist))+"条数据")
    except:
        return None

def writeFile(content):
    '''将获取的信息写入文件'''
    with open('./58urllib.txt','a',encoding='utf-8') as f:
        f.write(json.dumps(content,ensure_ascii=False) + "\n")

def main(num):
    '''主函数'''
    #判断当前路径是否存在images文件夹
    if not os.path.exists('./images'):
        os.mkdir('./images')
    for item in getpage(num):
        writeFile(item)

if __name__ == '__main__':
    num = int(input('请输入你要查看的页面（1-70）：'))
    main(num)
